import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from pathlib import Path


class TestSpider(CrawlSpider):
    name = "test"
    allowed_domains = ["zhihu.com"]
    start_urls = ["https://api.zhihu.com/education/training/1760689372985630721/video_page/catalog?limit=128&offset=0&education_channel_code=ZHZN-d62bb90dfad9e02"]

    #rules = (Rule(LinkExtractor(allow=r"Items/"), callback="parse_item", follow=True),)

    def parse_item(self, response):
        item = {}
        #item["domain_id"] = response.xpath('//input[@id="sid"]/@value').get()
        #item["name"] = response.xpath('//div[@id="name"]').get()
        #item["description"] = response.xpath('//div[@id="description"]').get()
        filename = "api.html"
        Path(filename).write_bytes(response.body)
        self.log(f"Saved file {filename}")

        return item
